
capture cd D:\Dropbox\book_welfare\replication



	***************************************************************************
	* make X: create a datafile that the rest of the program can access 
	***************************************************************************
	
	
	use  data\books_year.dta, clear 
		keep if year==2016
		keep if pubyr>=1960 & pubyr<=2016 
		
		mvencode fbookp, mv(0) o 
		
		merge m:1 pubyr using data\Nfcap.dta
		gen double M = 10.00e+07
	
		gen male=1-fbookp
		egen Nmale=sum(male), by(pubyr)

		gen double s = q/M 
		egen Q=sum(q) 
		gen double s0 = (M-Q)/M	
		
		egen QF=sum(qf)
		egen QM=sum(qm)
		
		gen   MF = (QF/(QM+QF))*M
		gen double sf = qf/MF 
		gen double s0f = (MF-QF)/MF

		gen   MM = (QM/(QM+QF))*M
		gen double sm = qm/MM 
		gen double s0m = (MM-QM)/MM

	
		egen xA= sum(q) if pubyr<year, by(author_id)
		egen A=max(xA), by(author_id)
		
		mvencode A, mv(0) 
		
		gen lA = ln(A+1)
		gen dmiss=A==0 
	
		gen dfemale=fbookp

		 capture keep M q Q fbookp s0 s   MM qm QM s0m sm   MF qf QF s0f sf  pubyr Nmale Nfcap genre lA dmiss   dfemale
		
		keep if q~=. 
		gen double eliminate=.
		

		keep s s0 q Q fbookp  pubyr Nfcap genre lA dmiss   s0m sm s0f sf dfemale  

		egen gno =group(genre)
		 		
		gen double delta = log(s) - log(s0) 
		gen lq = ln(q)
		
		gen Rsq=. 
		gen spec = ""
		
		***********************
		*parsimonious
		***********************
		
		label var fbookp "female author"
		label var lq "ln q"
		
		reg lq  lA i.gno dmiss fbookp i.pubyr 
			 estimates save estimates\simple_all, replace 
			 predict dhat_simple_all 
			 gen sig_simple_all = ((lq - dhat_simple_all)^2)^0.5
			 
			 replace Rsq = e(r2) if _n==1 
			 replace spec = "parsimonious, all years" if _n==1 
			 
			 
		 
		************************
		* check R-sq without past sales 		
 		reg lq   i.gno  fbookp if pubyr==2016 
		************************
		
		
		 reg lq  lA i.gno dmiss fbookp if pubyr==2016 
			 estimates save estimates\simple_2016, replace 
			 predict dhat_simple_2016 if pubyr==2016 
			 gen sig_simple_2016 = ((lq - dhat_simple_2016)^2)^0.5 if pubyr==2016 

			 replace Rsq = e(r2) if _n==2 
			 replace spec = "parsimonious, 2016" if _n==2 
			 
			 label var dhat_simple_2016 "ln qhat"
			 * label var dhat1_2016 "ln qhat"
		***********************
		* saturated 
		***********************

	 	reg lq  c.lA##i.fbookp##i.gno  i.pubyr  dmiss 
			estimates save estimates\saturated_all , replace 
			estimates use estimates\saturated_all 
			
			predict dhat_sat_all
			gen sig_sat_all = ((lq - dhat_sat_all)^2)^0.5  

			replace Rsq = e(r2) if _n==3 
			replace spec = "saturated, all years" if _n==3 
	 
	 
	 	reg lq  c.lA##i.fbookp##i.gno    dmiss if pubyr==2016 
			estimates save estimates\saturated_2016 , replace
			estimates use estimates\saturated_2016 
				
			predict dhat_sat_2016
			gen sig_sat_2016 = ((lq - dhat_sat_2016)^2)^0.5  

			replace Rsq = e(r2) if _n==4 
			replace spec = "saturated, 2016" if _n==4 
		
		****************************************************
		* compare the r-squared terms across models 
			graph bar (mean) Rsq, over(spec, label(angle(forty_five))) blabel(bar, format(%9.3f)) scheme(lean2)
		****************************************************

	  
			label var sig_simple_all "sigma, simple"
			label var dhat_simple_all "E[delta], simple"
			label var sig_sat_all "sigma, saturated"
			label var dhat_sat_all "E[delta], saturated"
	
		reg sig_simple_all  dhat_simple_all
		estimates save estimates\simple_res, replace 

		reg sig_sat_all  dhat_sat_all
		estimates save estimates\saturated_res, replace 


 ********************
 ********************
 * Bookstat
 ********************
 ********************
 
	 
		 
capture cd D:\Dropbox\book_welfare\replication\


	clear 
	clear matrix 
	clear mata 
	set maxvar 20000

	
use  data\bookstat_2021_welfare_asin.dta, clear  


		keep if year==2021 & q>0 
		keep if pyear>=1960 & pyear<=2021 
		gen dfemale = mshare<0.5 if mshare~=. 

		replace dfemale =0 if dfemale==. 
		keep if q> 0 
			
		egen xA= sum(q) if pyear<year, by(authorno)
		egen Asimple=max(xA), by(authorno)
		mvencode Asimple, mv(0) 
		
		
		gen A = q2020 if pyear==2021 
		replace A = q2019 if pyear==2020 
		replace A = q2018 if pyear==2018 
		replace A = q2018 if pyear==2018 
		
			
		gen lA = ln(A+1)
		gen dmiss=A==0 

		gen lAsimple = ln(Asimple+1)
		gen dmiss_simple=A==0 
	 
		egen Q = sum(q)
		gen M = 10e+08
		gen s=q/M
		gen s0 = (M-Q)/M

		gen  fbook =dfemale 
		rename pyear pubyr 

		egen gno=group(genre)

		keep s s0 q Q fbook  pubyr  genre lA dmiss lAsimple dmiss_simple  dfemale gno  
		
		
		gen sigma = 0
		
		gen double delta = log(s) - log(s0) - sigma*log(q/Q)
		gen lq = ln(q)
		
		label var lq "ln q"
		label var lA "log author prior sales"
		label var dmiss "missing author sales"
		label var fbook "female author"
		label var lAsimple "log author prior sales"
		label var dmiss_simple "missing author sales"
		
		
		
		* check without past sales: 
		reg lq   i.gno  fbook i.pubyr if pubyr==2021
		

		
		***************************************
		* simple 2021
		*********************************************
		
		reg lq  lA i.gno dmiss fbook i.pubyr if pubyr==2021
		estimates save estimates\simple_2021_bs, replace 
		predict qhat_simple_2021 
		gen sig_simple_2021_bs = ((lq - qhat_simple_2021)^2)^.5 

		
		reg lq  lA i.gno dmiss fbook i.pubyr if pubyr>=2019 
		
		reg lq  lAsimple i.gno dmiss_simple fbook i.pubyr  
		estimates save estimates\simple_all_bs, replace 
		predict qhat_simple_all 
		gen sig_simple_all_bs = ((lq - qhat_simple_all)^2)^.5 
		

		 
		
		
		* bs: saturated  
		reg lq  c.lA##i.fbook##i.gno    dmiss if  pubyr==2021
 		estimates save estimates\saturated_2021_bs, replace 
		predict qhat_sat_2021 
		gen sig_sat_2021_bs = ((lq - qhat_sat_2021)^2)^.5 
		
		reg lq  c.lAsimple##i.fbook##i.gno    dmiss_simple i.pubyr
 		estimates save estimates\saturated_all_bs, replace 
		predict qhat_sat_all 
		gen sig_sat_all_bs = ((lq - qhat_sat_all)^2)^.5 
		
		
		 
		label var sig_simple_2021_bs "sig sparse 2021"
		label var sig_simple_all_bs "sig sparse all"
		label var sig_sat_2021_bs "sig saturated 2021"
		label var sig_sat_all_bs "sig saturated all"

		label var qhat_simple_2021 "ln qhat"
		label var qhat_simple_all "ln qhat"
		label var qhat_sat_2021 "ln qhat"
		label var qhat_sat_all "ln qhat"

	 reg sig_simple_2021_bs qhat_simple_2021
 	estimates save estimates\simple_2021_bs_res, replace 
	 reg sig_simple_all_bs qhat_simple_all
 	estimates save estimates\simple_all_bs_res, replace 
	 reg sig_sat_2021_bs qhat_sat_2021 
 	estimates save estimates\saturated_2021_bs_res, replace 
	 reg sig_sat_all_bs qhat_sat_all 
 	estimates save estimates\saturated_all_bs_res, replace 
	
	********************
	* make regression table 
eststo clear
 	
	estimates use estimates\simple_2016 
	eststo: 	reg  
	estimates use estimates\simple_all 
	eststo: 	reg  
	estimates use estimates\simple_res 
	eststo: 	reg  
	estimates use estimates\saturated_res 
	eststo: 	reg  


	estimates use estimates\simple_2021_bs
	eststo: 	reg  
	estimates use estimates\simple_all_bs 
	eststo: 	reg  
 	estimates use estimates\simple_all_bs_res 
	eststo: 	reg  
 	estimates use estimates\saturated_all_bs_res
	eststo: 	reg  

local pattern prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})

	esttab,  noomitted  se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$")  drop(*.gno *.pubyr) mtitles("ln q, 2016 GR" "ln q, GR" "$\sigma$, sparse" "$\sigma$, saturated" "ln q, 2021 BS" "ln q, BS" "$\sigma$, sparse" "$\sigma$, saturated")

	
	esttab using latex_text\tables\prediction_asin.tex,  noomitted  se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$")  drop(*.gno *.pubyr) mtitles("ln q, 2016 GR" "ln q, GR" "$\sigma$, sparse" "$\sigma$, saturated" "ln q, 2021 BS" "ln q, BS" "$\sigma$, sparse" "$\sigma$, saturated") booktabs
	
	